import altair as alt
import pandas as pd
import geopandas as gpd
alt.data_transformers.disable_max_rows()
DataTransformerRegistry.enable('default')
data = pd.read_csv('data/population_prospects_all_countries.csv')
data = data.drop(data.loc[(data['year']==2020) & (data['variant']=='Medium variant'), 'variant'].index, axis=0)
data = data.reset_index(drop=True)
data['population'] = data['population']*1000 # i find it easier to work with exact numbers
data.head()
| variant | country | country_code | parent_code | year | population | |
|---|---|---|---|---|---|---|
| 0 | Estimates | Burundi | 108 | 910 | 1950 | 2308927.0 |
| 1 | Estimates | Comoros | 174 | 910 | 1950 | 159459.0 |
| 2 | Estimates | Djibouti | 262 | 910 | 1950 | 62000.0 |
| 3 | Estimates | Eritrea | 232 | 910 | 1950 | 822347.0 |
| 4 | Estimates | Ethiopia | 231 | 910 | 1950 | 18128030.0 |
data = data.rename(columns={'year':'Year', 'population':'Population', 'country':'Country'})
data['variant'] = data['variant'].str.replace('Medium variant', 'Prediction (medium variant)')
# таблиця-ланка для поєднання карти й даних населення
country_codes = pd.read_csv('country-codes.csv', usecols=['ISO3166-1-numeric', 'ISO3166-1-Alpha-3'])
country_codes.columns
country_codes = country_codes.rename(columns = {'ISO3166-1-numeric':'country_code', 'ISO3166-1-Alpha-3':'id'})
country_codes.head()
| id | country_code | |
|---|---|---|
| 0 | TWN | 158.0 |
| 1 | AFG | 4.0 |
| 2 | ALB | 8.0 |
| 3 | DZA | 12.0 |
| 4 | ASM | 16.0 |
data = pd.merge(data, country_codes, how='left', on='country_code')
pop_growth_percent = data[(data['Year']==2020) | (data['Year']==2100)]\
.pivot_table(index=['Country', 'country_code', 'id'], columns=['Year'], values='Population')\
.reset_index()\
.rename(columns={2020:'2020', 2100:'2100'}) # altair requires colnames to be strings
pop_growth_percent['Change'] = pop_growth_percent['2100'] - pop_growth_percent['2020']
pop_growth_percent['Change'] = pop_growth_percent['Change']/pop_growth_percent['2020']
pop_growth_percent.head()
| Year | Country | country_code | id | 2020 | 2100 | Change |
|---|---|---|---|---|---|---|
| 0 | Afghanistan | 4 | AFG | 38928341.0 | 74937961.0 | 0.925023 |
| 1 | Albania | 8 | ALB | 2877800.0 | 1088338.0 | -0.621816 |
| 2 | Algeria | 12 | DZA | 43851043.0 | 70704619.0 | 0.612382 |
| 3 | American Samoa | 16 | ASM | 55197.0 | 36156.0 | -0.344964 |
| 4 | Andorra | 20 | AND | 77265.0 | 62406.0 | -0.192312 |
! wget https://raw.githubusercontent.com/datasets/country-codes/master/data/country-codes.csv
--2022-04-24 23:40:37-- https://raw.githubusercontent.com/datasets/country-codes/master/data/country-codes.csv SSL_INIT Loaded CA certificate '/etc/ssl/certs/ca-certificates.crt' Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.108.133, 185.199.109.133, ... Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected. HTTP request sent, awaiting response... 200 OK Length: 129955 (127K) [text/plain] Saving to: ‘country-codes.csv.2’ country-codes.csv.2 100%[===================>] 126,91K --.-KB/s in 0,07s 2022-04-24 23:40:37 (1,69 MB/s) - ‘country-codes.csv.2’ saved [129955/129955]
world = gpd.read_file('../02/practice/data/world-countries.json')
world = world.rename(columns={'name':'Country'})
# деякі виправлення для заповнення місметчів
world.loc[world['Country']=='Western Sahara', 'id'] = 'ESH' #732
world.loc[world['Country']=='Somaliland', 'id'] = 'SOM' #706
world.loc[world['Country']=='South Sudan', 'id'] = 'SSD'
world_pop_change = world.merge(pop_growth_percent.drop(columns=['Country']), how='left', on='id')
world_pop_change.head()
| id | Country | geometry | country_code | 2020 | 2100 | Change | |
|---|---|---|---|---|---|---|---|
| 0 | AFG | Afghanistan | POLYGON ((61.21082 35.65007, 62.23065 35.27066... | 4.0 | 38928341.0 | 74937961.0 | 0.925023 |
| 1 | AGO | Angola | MULTIPOLYGON (((16.32653 -5.87747, 16.57318 -6... | 24.0 | 32866268.0 | 188283132.0 | 4.728765 |
| 2 | ALB | Albania | POLYGON ((20.59025 41.85540, 20.46317 41.51509... | 8.0 | 2877800.0 | 1088338.0 | -0.621816 |
| 3 | ARE | United Arab Emirates | POLYGON ((51.57952 24.24550, 51.75744 24.29407... | 784.0 | 9890400.0 | 12909869.0 | 0.305293 |
| 4 | ARG | Argentina | MULTIPOLYGON (((-65.50000 -55.20000, -66.45000... | 32.0 | 45195777.0 | 56802493.0 | 0.256810 |
selector = alt.selection_single(fields=['id'], name='id', empty='all')
choropleth = alt.Chart(world_pop_change).project(type = 'mercator')\
.mark_geoshape(stroke = '#C2C0C0', strokeWidth=1).encode(
tooltip = [alt.Tooltip('Country:N'),
alt.Tooltip('Change', format='.1%')],
color = alt.Color('Change:Q',
scale=alt.Scale(scheme='purplegreen',
domain=[-5.81,5.81]),
legend=alt.Legend(title=['Change in population'],
titleAlign='center',
titleAnchor='middle',
orient='none',
legendX=490, legendY=10,
direction='horizontal',
gradientLength=150.0,
format='%')
),
opacity=alt.condition(selector,
alt.value(1.0),
alt.value(0.3)),
).add_selection(selector).properties(
title=alt.TitleParams(text='Population changes by country, 2020-2100',
subtitle=['predicted proportional to 2020 estimates']))
linechart = alt.Chart(data).mark_line().encode(
x=alt.X('Year:Q', axis=alt.Axis(format='04d'),
scale=alt.Scale(domain=[1950, 2100])),
y=alt.Y('Population:Q',
# aggregate='sum',
scale=alt.Scale(domain=[0, 1e9+7e8]),
axis=alt.Axis(format='s'), title='Population'),
detail='Country:N',
color=alt.value('#14652E'),
strokeDash=alt.StrokeDash(field='variant',
scale=alt.Scale(
domain=['Estimates', 'Prediction (medium variant)'],
range=[[1, 0], [3, 3]]),
legend=alt.Legend(title='Variant',
orient='none',
legendX=710, legendY=10)),
tooltip=alt.Tooltip(['Country:N', 'Population:Q', 'Year:Q']),
opacity=alt.condition(selector,
alt.value(1.0),
alt.value(0.0))
).add_selection(selector).properties(
title=alt.TitleParams(text=['Population changes by country, 1950-2100'],
subtitle=['historical data and predictions, absolute figures']))
alt.hconcat(choropleth.properties(width=650, height=550),
linechart.properties(width=300, height=550),
title = alt.TitleParams(text=['Hover mouse cursor over a country to see its name and exact figures',
'Click on a country on the map to highlight it and select in the linegchart to the right'],
baseline='bottom',
orient='bottom',
anchor='start',
fontWeight='normal',
fontSize=12)
).configure_concat(spacing=0)